library(tidycensus)
library(tidyverse)
library(tmap)
library(sf)
census_api_key("YOUR API KEY GOES HERE")
decennial_variables<-load_variables(2010,"sf1")
View(decennial_variables)
| name | label | concept |
|---|---|---|
| H001001 | Total | HOUSING UNITS |
| H002001 | Total | URBAN AND RURAL |
| H002002 | Total!!Urban | URBAN AND RURAL |
| H002003 | Total!!Urban!!Inside urbanized areas | URBAN AND RURAL |
| H002004 | Total!!Urban!!Inside urban clusters | URBAN AND RURAL |
| H002005 | Total!!Rural | URBAN AND RURAL |
Let’s find out the population, by state, in 2010:
state_population_2010<-get_decennial(geography = "state",
variables = "P001001",
geometry=TRUE,
shift_geo = TRUE,
year = 2010)
View(state_population_2010)
| GEOID | NAME | variable | value | geometry |
|---|---|---|---|---|
| 04 | Arizona | P001001 | 6392017 | MULTIPOLYGON (((-1111066 -8… |
| 05 | Arkansas | P001001 | 2915918 | MULTIPOLYGON (((557903.1 -1… |
| 06 | California | P001001 | 37253956 | MULTIPOLYGON (((-1853480 -9… |
| 08 | Colorado | P001001 | 5029196 | MULTIPOLYGON (((-613452.9 -… |
| 09 | Connecticut | P001001 | 3574097 | MULTIPOLYGON (((2226838 519… |
| 11 | District of Columbia | P001001 | 601723 | MULTIPOLYGON (((1960720 -41… |
We can adjust the geography and year parameters; let’s say we want the population distribution across Colorado counties in the year 2010, based on the 2010 decennial census:
CO_county_population_2010<-get_decennial(geography = "county",
state="CO",
variables = "P001001",
year = 2010)
View(CO_county_population_2010)
| GEOID | NAME | variable | value |
|---|---|---|---|
| 08023 | Costilla County, Colorado | P001001 | 3524 |
| 08025 | Crowley County, Colorado | P001001 | 5823 |
| 08027 | Custer County, Colorado | P001001 | 4255 |
| 08029 | Delta County, Colorado | P001001 | 30952 |
| 08031 | Denver County, Colorado | P001001 | 600158 |
| 08035 | Douglas County, Colorado | P001001 | 285465 |
Let’s clean up the dataset of Colorado’s 2010 population distribution with respect to counties that we just created; we’ll remove the “variable” column, and rename the “value” column as “Population”. We can perform these operations by calling functions from the “dplyr” package. In the following code, we are editing the Co_county_population_2010 dataset (note that the changes on the right hand side of the assignment operator are being applied to the CO_county_population_2010 dataset, and that we’re not creating a new object). The first line of code following the assignment operator (the <- symbol) calls the CO_county_population_2010 dataset, and establishes that the subsequent functional operations are being applied to this dataset. After calling the dataset of interest, we use the pipe operator %>% to indicate that we want the second line of code (beginning with mutate) to be applied to the dataset we called in the first line (the pipe operator can be translated into ordinary language as “and then”, i.e. “take the ‘CO_county_population’ dataset and then apply the second line of code to this dataset”).
The mutate function is a dplyr function that can be used to create new variables in a dataset based on existing variables, as well as delete variables by setting the variable name to “NULL.” Here, the variable we want to delete is named “variable”, so we call the mutate function, and write variable=NULL in parentheses to delete the “variable” field in the CO_county_population_2010 dataset. We then use the %>% operator once again to take the dataset that results after deleting the “variable” field, and then use the rename function to rename the existing “value” field as “population.”
CO_county_population_2010<-CO_county_population_2010 %>%
mutate(variable=NULL) %>%
rename(population=value)
View(CO_county_population_2010)
| GEOID | NAME | population |
|---|---|---|
| 08023 | Costilla County, Colorado | 3524 |
| 08025 | Crowley County, Colorado | 5823 |
| 08027 | Custer County, Colorado | 4255 |
| 08029 | Delta County, Colorado | 30952 |
| 08031 | Denver County, Colorado | 600158 |
| 08035 | Douglas County, Colorado | 285465 |
It’s also possible to call multiple variables into a single table. To see this, let’s add a field/column containing the rural population in each state in 2010 (as well as the total population in that year), and order the dataset in descending order with respect to the rural population (such that the state with the largest rural population will appear as the first record in the dataset):
state_pop_ruralpop_2010<-get_decennial(geography = "state",
variables = c("P001001", "P002005"),
output="wide",
year = 2010) %>%
rename(total_population=P001001, rural_population=P002005) %>%
arrange(desc(rural_population))
state_pop_ruralpop_2010
| GEOID | NAME | total_population | rural_population |
|---|---|---|---|
| 48 | Texas | 25145561 | 3847522 |
| 37 | North Carolina | 9535483 | 3233727 |
| 42 | Pennsylvania | 12702379 | 2711092 |
| 39 | Ohio | 11536504 | 2546810 |
| 26 | Michigan | 9883640 | 2513683 |
| 13 | Georgia | 9687653 | 2415502 |
Let’s generate a new variable based on the variables we already have in the dataset. This variable will measure the percentage of each state’s population that are rural residents (calculated by dividing the rural population by the total population, and multiplying by 100). We’ll call this variable “rural_pct”. We’ll also resort the dataset, so that it’s sorted in descending order with respect to the new “rural_pct” variable, instead of the actual number of rural residents:
state_pop_ruralpop_2010<-
state_pop_ruralpop_2010 %>% mutate(rural_pct=(rural_population/total_population)*100) %>%
arrange(desc(rural_pct))
View(state_pop_ruralpop_2010)
| GEOID | NAME | total_population | rural_population | rural_pct |
|---|---|---|---|---|
| 23 | Maine | 1328361 | 814819 | 61.34018 |
| 50 | Vermont | 625741 | 382356 | 61.10451 |
| 54 | West Virginia | 1852994 | 950184 | 51.27831 |
| 28 | Mississippi | 2967297 | 1503073 | 50.65462 |
| 30 | Montana | 989415 | 436401 | 44.10697 |
| 05 | Arkansas | 2915918 | 1278329 | 43.83968 |
The dplyr package also makes it easy to filter datasets based on specific criteria, which we can then assign to a new object. For example, let’s say that we want to generate a new dataset that only includes states whose rural populations are greater than 40% of their overall populations. We’ll assign this new dataset to an object called “rural_pct_over40”:
rural_pct_over40<-state_pop_ruralpop_2010 %>% filter(rural_pct>40)
View(rural_pct_over40)
| GEOID | NAME | total_population | rural_population | rural_pct |
|---|---|---|---|---|
| 23 | Maine | 1328361 | 814819 | 61.34018 |
| 50 | Vermont | 625741 | 382356 | 61.10451 |
| 54 | West Virginia | 1852994 | 950184 | 51.27831 |
| 28 | Mississippi | 2967297 | 1503073 | 50.65462 |
| 30 | Montana | 989415 | 436401 | 44.10697 |
| 05 | Arkansas | 2915918 | 1278329 | 43.83968 |
| 46 | South Dakota | 814180 | 352933 | 43.34828 |
| 21 | Kentucky | 4339367 | 1806024 | 41.61953 |
| 01 | Alabama | 4779736 | 1957932 | 40.96318 |
| 38 | North Dakota | 672591 | 269719 | 40.10149 |
Create a dataset of Colorado counties that had a rural population that exceeded 50% of the county’s overall population in 2010, and sort the dataset in descending order with respect to the field containing information on the percentage of the county’s rural resident’s. Your final dataset should look something like this:
| GEOID | NAME | total_population | rural_population | rural_pct |
|---|---|---|---|---|
| 08023 | Costilla County, Colorado | 3524 | 3524 | 100.00000 |
| 08025 | Crowley County, Colorado | 5823 | 5823 | 100.00000 |
| 08027 | Custer County, Colorado | 4255 | 4255 | 100.00000 |
| 08033 | Dolores County, Colorado | 2064 | 2064 | 100.00000 |
| 08039 | Elbert County, Colorado | 23086 | 23086 | 100.00000 |
| 08047 | Gilpin County, Colorado | 5441 | 5441 | 100.00000 |
| 08053 | Hinsdale County, Colorado | 843 | 843 | 100.00000 |
| 08057 | Jackson County, Colorado | 1394 | 1394 | 100.00000 |
| 08061 | Kiowa County, Colorado | 1398 | 1398 | 100.00000 |
| 08073 | Lincoln County, Colorado | 5467 | 5467 | 100.00000 |
| 08079 | Mineral County, Colorado | 712 | 712 | 100.00000 |
| 08091 | Ouray County, Colorado | 4436 | 4436 | 100.00000 |
| 08093 | Park County, Colorado | 16206 | 16206 | 100.00000 |
| 08095 | Phillips County, Colorado | 4442 | 4442 | 100.00000 |
| 08111 | San Juan County, Colorado | 699 | 699 | 100.00000 |
| 08109 | Saguache County, Colorado | 6108 | 6108 | 100.00000 |
| 08103 | Rio Blanco County, Colorado | 6666 | 6666 | 100.00000 |
| 08113 | San Miguel County, Colorado | 7359 | 7359 | 100.00000 |
| 08115 | Sedgwick County, Colorado | 2379 | 2379 | 100.00000 |
| 08121 | Washington County, Colorado | 4814 | 4814 | 100.00000 |
| 08009 | Baca County, Colorado | 3788 | 3788 | 100.00000 |
| 08021 | Conejos County, Colorado | 8256 | 8256 | 100.00000 |
| 08017 | Cheyenne County, Colorado | 1836 | 1836 | 100.00000 |
| 08019 | Clear Creek County, Colorado | 9088 | 9088 | 100.00000 |
| 08049 | Grand County, Colorado | 14843 | 12260 | 82.59786 |
| 08083 | Montezuma County, Colorado | 25535 | 17155 | 67.18230 |
| 08125 | Yuma County, Colorado | 10043 | 6519 | 64.91088 |
| 08029 | Delta County, Colorado | 30952 | 19553 | 63.17201 |
| 08119 | Teller County, Colorado | 23350 | 14618 | 62.60385 |
| 08105 | Rio Grande County, Colorado | 11982 | 7493 | 62.53547 |
| 08067 | La Plata County, Colorado | 51334 | 30774 | 59.94857 |
| 08007 | Archuleta County, Colorado | 12084 | 7175 | 59.37603 |
| 08051 | Gunnison County, Colorado | 15324 | 8981 | 58.60741 |
| 08055 | Huerfano County, Colorado | 6711 | 3768 | 56.14662 |
##More advanced data wrangling
##Iteration, Temporal Dynamics, and Exploratory Visualization
Let’s
my_years<-c(2000,2010)
population_rural_2000_2010<-map(
my_years,
~(get_decennial(geography = "state",
variables = c("P001001", "P002005"),
output="wide",
year =.)) %>%
mutate(rural_pct=(P002005/P001001)*100) %>%
arrange(NAME)
)
## Getting data from the 2000 decennial Census
## Using Census Summary File 1
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
names(population_rural_2000_2010)<-my_years
rural_change<-full_join(population_rural_2000_2010[["2000"]],
population_rural_2000_2010[["2010"]],by="NAME") %>%
mutate(rural_pct_change=rural_pct.y-rural_pct.x) %>%
select(NAME,rural_pct_change)
rural_change
## # A tibble: 52 x 2
## NAME rural_pct_change
## <chr> <dbl>
## 1 Alabama -3.59
## 2 Alaska -0.421
## 3 Arizona -1.64
## 4 Arkansas -3.64
## 5 California -0.509
## 6 Colorado -1.68
## 7 Connecticut -0.252
## 8 Delaware -3.18
## 9 District of Columbia 0
## 10 Florida -1.88
## # … with 42 more rows
basegraph<-rural_change %>%
ggplot(aes(x = reorder(NAME,rural_pct_change), y=rural_pct_change)) +
geom_col()+
coord_flip()
basegraph+labs(title="Rural Depopulation", x="State Name", y="Pct Change in Rural Population")+
theme(plot.title=element_text(hjust=0.5))
rural_depop_tomap<-full_join(state_population_2010,rural_change,by="NAME")
foundational_map<-tm_shape(rural_depop_tomap)+
tm_polygons(col="rural_pct_change", n=6,style="jenks",palette="BuGn", midpoint=TRUE)
foundational_map
## Warning: The shape rural_depop_tomap contains empty units.
##custom breaks and title
revised_map<-tm_shape(rural_depop_tomap)+
tm_polygons(col="rural_pct_change", breaks=c(-6,-4,-2, 0, 1, 2),palette="YlGnBu", midpoint=TRUE)+
tm_layout(frame=FALSE, main.title="Percentage Point Change\nin Rural Population, By State",
main.title.position="left", legend.outside=TRUE)
revised_map
## Warning: The shape rural_depop_tomap contains empty units.
Practice visualizing Census by doing ONE of the following: 1) make a map (using the tmap package) that shows county-level variation in the median age across the state of Colorado or 2) make a visualization (using the ggplot package) of state-level variation in the median age across the entire United States.
median_age_CO<- get_decennial(geography = "county",
state="CO",
variables = "P013001",
year = 2010,
geometry = TRUE) %>%
rename(median_age=value) %>%
relocate(NAME)
median_age_CO_map<-tm_shape(median_age_CO)+
tm_polygons(col="median_age",breaks=c(30,35,40,45,50),palette="YlGnBu", midpoint=TRUE)+
tm_layout(frame=FALSE, main.title="Median Age by County,\nColorado",
main.title.position="left", legend.outside=TRUE)
median_age_CO_map
Making a Web Map
tmap_mode("view")
median_age_CO_map
median_age_CO_visualization<-
median_age_CO %>%
ggplot(aes(x = median_age, y = reorder(NAME, median_age))) +
geom_point()+
labs(title="Median Age by County, CO", x="Median Age", y="County Name")+
theme(plot.title=element_text(hjust=0.5))
median_age_CO_visualization
median_age_CO_cleaned<-median_age_CO %>%
mutate(County_Name=str_remove_all(NAME,"Colorado|,|County"))
median_age_CO_cleaned_visualization<-
median_age_CO_cleaned %>%
ggplot(aes(x = median_age, y = reorder(County_Name, median_age))) +
geom_point()+
labs(title="Median Age by County, CO", x="Median Age", y="County")+
theme(plot.title=element_text(hjust=0.5))
median_age_CO_cleaned_visualization
To inspect the variable list for the ACS, use the “load variables” function. Let’s say we want to work with the 5-year ACS ending in 2019:
ACS_5_2019<-load_variables(2019,"acs5")
View(ACS_5_2019)
| name | label | concept |
|---|---|---|
| B01001_001 | Estimate!!Total: | SEX BY AGE |
| B01001_002 | Estimate!!Total:!!Male: | SEX BY AGE |
| B01001_003 | Estimate!!Total:!!Male:!!Under 5 years | SEX BY AGE |
| B01001_004 | Estimate!!Total:!!Male:!!5 to 9 years | SEX BY AGE |
| B01001_005 | Estimate!!Total:!!Male:!!10 to 14 years | SEX BY AGE |
| B01001_006 | Estimate!!Total:!!Male:!!15 to 17 years | SEX BY AGE |
Let’s issue a call to the API and generate a table that gives us the median-income of the United States by county. We may want to eventually have the option of mapping this data, so we’ll set the geometry parameter equal to TRUE. Note that when using the “get_acs” function call, the default setting will return data from the 5-year ACS that terminates in the specified year (i.e. if the year parameter is set to 2019, the function will return the 2015-2019 ACS). If we want to call the 1 year or 3 year ACS, the “survey” argument of the “get_acs” function could be set to “acs1” or “acs3”, depending on which survey we are interested in calling.
median_income<-get_acs(geography="county",
variables="B19013_001",
year=2019) %>%
rename(median_income=estimate) %>%
arrange(desc(median_income))
View(median_income)
| GEOID | NAME | variable | median_income | moe |
|---|---|---|---|---|
| 51107 | Loudoun County, Virginia | B19013_001 | 142299 | 2089 |
| 51610 | Falls Church city, Virginia | B19013_001 | 127610 | 16144 |
| 51059 | Fairfax County, Virginia | B19013_001 | 124831 | 1281 |
| 06085 | Santa Clara County, California | B19013_001 | 124055 | 1117 |
| 06081 | San Mateo County, California | B19013_001 | 122641 | 1680 |
| 35028 | Los Alamos County, New Mexico | B19013_001 | 121324 | 4613 |
Let’s say that we want to generate a table that contains the highest median-income county for each state. To do so, we will use dplyr’s “group_by” and “slice” functions, after separating out the “Name” field in the existing table (which is in the form “County Name, State”) into separate “County” and “State” fields:
highest_income_counties<-median_income %>%
separate(NAME,c("County","State"),sep=",") %>%
group_by(State) %>%
arrange(desc(median_income)) %>%
slice(1) %>%
unite(NAME, c("County","State"), remove=FALSE, sep=",")
View(highest_income_counties)
kable(highest_income_counties)
| GEOID | NAME | County | State | variable | median_income | moe |
|---|---|---|---|---|---|---|
| 01117 | Shelby County, Alabama | Shelby County | Alabama | B19013_001 | 77799 | 2248 |
| 02110 | Juneau City and Borough, Alaska | Juneau City and Borough | Alaska | B19013_001 | 88390 | 4059 |
| 04013 | Maricopa County, Arizona | Maricopa County | Arizona | B19013_001 | 64468 | 326 |
| 05007 | Benton County, Arkansas | Benton County | Arkansas | B19013_001 | 66362 | 1292 |
| 06085 | Santa Clara County, California | Santa Clara County | California | B19013_001 | 124055 | 1117 |
| 08035 | Douglas County, Colorado | Douglas County | Colorado | B19013_001 | 119730 | 1710 |
| 09001 | Fairfield County, Connecticut | Fairfield County | Connecticut | B19013_001 | 95645 | 1039 |
| 10003 | New Castle County, Delaware | New Castle County | Delaware | B19013_001 | 73892 | 1210 |
| 11001 | District of Columbia, District of Columbia | District of Columbia | District of Columbia | B19013_001 | 86420 | 1008 |
| 12109 | St. Johns County, Florida | St. Johns County | Florida | B19013_001 | 82252 | 2741 |
| 13117 | Forsyth County, Georgia | Forsyth County | Georgia | B19013_001 | 107218 | 2004 |
| 15003 | Honolulu County, Hawaii | Honolulu County | Hawaii | B19013_001 | 85857 | 907 |
| 16081 | Teton County, Idaho | Teton County | Idaho | B19013_001 | 74216 | 3576 |
| 17093 | Kendall County, Illinois | Kendall County | Illinois | B19013_001 | 96563 | 4721 |
| 18057 | Hamilton County, Indiana | Hamilton County | Indiana | B19013_001 | 98173 | 2249 |
| 19049 | Dallas County, Iowa | Dallas County | Iowa | B19013_001 | 88479 | 3234 |
| 20091 | Johnson County, Kansas | Johnson County | Kansas | B19013_001 | 89087 | 998 |
| 21185 | Oldham County, Kentucky | Oldham County | Kentucky | B19013_001 | 99128 | 3974 |
| 22005 | Ascension Parish, Louisiana | Ascension Parish | Louisiana | B19013_001 | 80527 | 3017 |
| 23005 | Cumberland County, Maine | Cumberland County | Maine | B19013_001 | 73072 | 1427 |
| 24027 | Howard County, Maryland | Howard County | Maryland | B19013_001 | 121160 | 2169 |
| 25019 | Nantucket County, Massachusetts | Nantucket County | Massachusetts | B19013_001 | 107717 | 5735 |
| 26093 | Livingston County, Michigan | Livingston County | Michigan | B19013_001 | 84221 | 1674 |
| 27139 | Scott County, Minnesota | Scott County | Minnesota | B19013_001 | 102152 | 3021 |
| 28089 | Madison County, Mississippi | Madison County | Mississippi | B19013_001 | 71824 | 2728 |
| 29183 | St. Charles County, Missouri | St. Charles County | Missouri | B19013_001 | 84978 | 1195 |
| 30043 | Jefferson County, Montana | Jefferson County | Montana | B19013_001 | 69646 | 4258 |
| 31153 | Sarpy County, Nebraska | Sarpy County | Nebraska | B19013_001 | 82032 | 1552 |
| 32015 | Lander County, Nevada | Lander County | Nevada | B19013_001 | 88030 | 21398 |
| 33015 | Rockingham County, New Hampshire | Rockingham County | New Hampshire | B19013_001 | 93756 | 1893 |
| 34027 | Morris County, New Jersey | Morris County | New Jersey | B19013_001 | 115527 | 1813 |
| 35028 | Los Alamos County, New Mexico | Los Alamos County | New Mexico | B19013_001 | 121324 | 4613 |
| 36059 | Nassau County, New York | Nassau County | New York | B19013_001 | 116100 | 1093 |
| 37183 | Wake County, North Carolina | Wake County | North Carolina | B19013_001 | 80591 | 822 |
| 38105 | Williams County, North Dakota | Williams County | North Dakota | B19013_001 | 87161 | 7443 |
| 39041 | Delaware County, Ohio | Delaware County | Ohio | B19013_001 | 106908 | 2786 |
| 40017 | Canadian County, Oklahoma | Canadian County | Oklahoma | B19013_001 | 72056 | 1690 |
| 41067 | Washington County, Oregon | Washington County | Oregon | B19013_001 | 82215 | 997 |
| 42029 | Chester County, Pennsylvania | Chester County | Pennsylvania | B19013_001 | 100214 | 1232 |
| 72061 | Guaynabo Municipio, Puerto Rico | Guaynabo Municipio | Puerto Rico | B19013_001 | 35928 | 1674 |
| 44009 | Washington County, Rhode Island | Washington County | Rhode Island | B19013_001 | 85531 | 2042 |
| 45013 | Beaufort County, South Carolina | Beaufort County | South Carolina | B19013_001 | 68377 | 1987 |
| 46083 | Lincoln County, South Dakota | Lincoln County | South Dakota | B19013_001 | 82473 | 2951 |
| 47187 | Williamson County, Tennessee | Williamson County | Tennessee | B19013_001 | 112962 | 2976 |
| 48397 | Rockwall County, Texas | Rockwall County | Texas | B19013_001 | 100920 | 4011 |
| 49043 | Summit County, Utah | Summit County | Utah | B19013_001 | 102958 | 5613 |
| 50007 | Chittenden County, Vermont | Chittenden County | Vermont | B19013_001 | 73647 | 2249 |
| 51107 | Loudoun County, Virginia | Loudoun County | Virginia | B19013_001 | 142299 | 2089 |
| 53033 | King County, Washington | King County | Washington | B19013_001 | 94974 | 726 |
| 54037 | Jefferson County, West Virginia | Jefferson County | West Virginia | B19013_001 | 80430 | 3750 |
| 55133 | Waukesha County, Wisconsin | Waukesha County | Wisconsin | B19013_001 | 87277 | 1110 |
| 56039 | Teton County, Wyoming | Teton County | Wyoming | B19013_001 | 84678 | 8230 |
If we want to visualize this information, we can incorporate the MOE for these estimates into the visualization, so that we can convey the uncertainty surrounding these median income estimates.
highest_income_counties_viz<-highest_income_counties %>%
ggplot(aes(x=median_income,y=reorder(NAME, median_income)))+
geom_errorbarh(aes(xmin = median_income - moe, xmax = median_income + moe)) +
geom_point(color = "red", size = 3)+
labs(title="County with Highest Median Income, by State",
y="",
x="Median Income Estimate from 5-year ACS\n(bars indicate margin of error)")+
theme(plot.title=element_text(hjust=0.5))
highest_income_counties_viz
Student Exercise: Generate a visualization of median income by county in Colorado, using the 2014-2018 ACS:
median_income_CO_2018<-get_acs(geography="county",
state="CO",
variables="B19013_001",
year=2018) %>%
rename(median_income=estimate) %>%
arrange(desc(median_income))
highest_income_counties_CO_2018_viz<-
median_income_CO_2018 %>%
mutate(County_Name=str_remove_all(NAME,"Colorado|,|County")) %>%
ggplot(aes(x=median_income,y=reorder(County_Name, median_income)))+
geom_errorbarh(aes(xmin = median_income - moe, xmax = median_income + moe)) +
geom_point(color = "blue", size = 3)+
labs(title="Median Income in Colorado, by County (2018)",
y="",
x="Median Income Estimate from 5 year ACS\n(Bars indicate margin of error)")+
theme(plot.title=element_text(hjust=0.5))
highest_income_counties_CO_2018_viz
Student Exercise: Make an interactive nationwide map of median income by state in 2016
median_income_2016_states<-get_acs(geography="state",
variables="B19013_001",
year=2016,
geometry=TRUE) %>%
rename(median_income=estimate) %>%
arrange(desc(median_income)) %>%
relocate(NAME)
median_income_map<-tm_shape(median_income_2016_states)+
tm_polygons(col="median_income", n=6, style="fisher", palette="YlOrBr")
tmap_mode("view")
median_income_map
Exploratory Correlations: Relationship Between County Median Income and County Share of Over-25 Population with Doctoral Degrees (5 Year ACS ending in 2018)
education_vars<-c(Bachelors="B15003_022", Masters="B15003_023", Professional="B15003_024",
Doctorate="B15003_025")
education_acs_2018<-get_acs(geography="county",
variables=education_vars,
summary_var="B15003_001")
## Getting data from the 2015-2019 5-year ACS
View(education_acs_2018)
| GEOID | NAME | variable | estimate | moe | summary_est | summary_moe |
|---|---|---|---|---|---|---|
| 01001 | Autauga County, Alabama | Bachelors | 6019 | 622 | 37367 | 201 |
| 01001 | Autauga County, Alabama | Masters | 2875 | 412 | 37367 | 201 |
| 01001 | Autauga County, Alabama | Professional | 499 | 187 | 37367 | 201 |
| 01001 | Autauga County, Alabama | Doctorate | 536 | 199 | 37367 | 201 |
| 01003 | Baldwin County, Alabama | Bachelors | 31801 | 1609 | 151112 | 326 |
| 01003 | Baldwin County, Alabama | Masters | 11812 | 935 | 151112 | 326 |
pct_less_than_BA<-education_acs_2018 %>%
group_by(GEOID) %>%
mutate(no_degree_pct=((summary_est-sum(estimate))/(summary_est)*100)) %>%
summarize(mean(no_degree_pct)) %>%
rename(no_degree_pct="mean(no_degree_pct)")
View(pct_less_than_BA)
| GEOID | no_degree_pct |
|---|---|
| 01001 | 73.42843 |
| 01003 | 68.13754 |
| 01005 | 88.42129 |
| 01007 | 89.62147 |
| 01009 | 86.90659 |
| 01011 | 87.94536 |
median_income<-get_acs(geography="county",
variables="B19013_001",
year=2018) %>%
rename(median_income=estimate) %>%
arrange(desc(median_income))
## Getting data from the 2014-2018 5-year ACS
median_income_nodegree<-full_join(median_income,pct_less_than_BA,by="GEOID")
View(median_income_nodegree)
kable(head(median_income_nodegree))
| GEOID | NAME | variable | median_income | moe | no_degree_pct |
|---|---|---|---|---|---|
| 51107 | Loudoun County, Virginia | B19013_001 | 136268 | 2063 | 38.65637 |
| 51610 | Falls Church city, Virginia | B19013_001 | 124796 | 15295 | 22.44259 |
| 51059 | Fairfax County, Virginia | B19013_001 | 121133 | 1144 | 38.44545 |
| 24027 | Howard County, Maryland | B19013_001 | 117730 | 2023 | 37.43706 |
| 51013 | Arlington County, Virginia | B19013_001 | 117374 | 2067 | 24.70069 |
| 06085 | Santa Clara County, California | B19013_001 | 116178 | 938 | 47.59985 |
medianincome_nodegree_viz<-median_income_nodegree %>%
ggplot()+
geom_point(aes(x=no_degree_pct,y=median_income))+
geom_smooth(aes(x=no_degree_pct,y=median_income),method="lm")+
ylab("Estimated County Median Income")+
xlab("Estimated Percentage of Under-25 Population Without at least Bachelors Degree")
medianincome_nodegree_viz
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
median_income_nodegree<-median_income_nodegree %>%
separate(NAME,c("County","State"),sep=",")
medianincome_nodegree_bystate_viz<-median_income_nodegree %>%
ggplot()+
geom_point(aes(x=no_degree_pct,y=median_income))+
geom_smooth(aes(x=no_degree_pct,y=median_income),method="lm")+
ylab("County Median Income")+
xlab("Pct No Degree")+
facet_wrap(~State)
medianincome_nodegree_bystate_viz
Colorado Covid Cases
setwd(setwd("~/Desktop"))
co_covid<-read_csv("co_covid.csv") %>%
mutate(GEOID=as.character(GEOID)) %>%
mutate(GEOID=paste0("0",GEOID))
View(co_covid)
| OBJECTID | FULL_ | GEOID | LABEL | STAETFP | COUNTY | COUNTYFP | County_Pos_Cases | County_Population | County_Rate_Per_100_000 | County_Pos_Cases_Yesterday | County_Pos_Cases_Change | County_Deaths | County_Deaths_Yesterday | County_Deaths_Change | State_Pos_Cases | State_Population | State_Rate_Per_100000 | State_Deaths | State_CDC_Deaths | State_Number_Hospitalizations | State_Number_Tested | State_Test_Encounters | State_Number_of_Counties_Pos | State_Number_of_Outbreaks | Data_Source | Date_Data_Last_Updated | Shape__Area | Shape__Length |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Saguache County | 08109 | Saguache | 8 | SAGUACHE | 109 | 327 | 6824 | 4791.91 | 327 | 0 | 4 | 4 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.8431490 | 4.391844 |
| 2 | Sedgwick County | 08115 | Sedgwick | 8 | SEDGWICK | 115 | 197 | 2229 | 8838.04 | 196 | 1 | 2 | 2 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.1520111 | 1.707546 |
| 3 | Cheyenne County | 08017 | Cheyenne | 8 | CHEYENNE | 17 | 125 | 1825 | 6849.32 | 125 | 0 | 5 | 5 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.4786278 | 3.111271 |
| 4 | Custer County | 08027 | Custer | 8 | CUSTER | 27 | 165 | 5059 | 3261.51 | 164 | 1 | 1 | 1 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.1968592 | 2.364502 |
| 5 | La Plata County | 08067 | La Plata | 8 | LA PLATA | 67 | 3169 | 56272 | 5631.58 | 3161 | 8 | 37 | 37 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.4472778 | 3.022820 |
| 6 | San Juan County | 08111 | San Juan | 8 | SAN JUAN | 111 | 43 | 726 | 5922.87 | 43 | 0 | NA | NA | NA | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.1028337 | 1.716641 |
co_covid_medianincome<-inner_join(median_income, co_covid,by="GEOID")
View(co_covid_medianincome)
| GEOID | NAME | variable | median_income | moe | OBJECTID | FULL_ | LABEL | STAETFP | COUNTY | COUNTYFP | County_Pos_Cases | County_Population | County_Rate_Per_100_000 | County_Pos_Cases_Yesterday | County_Pos_Cases_Change | County_Deaths | County_Deaths_Yesterday | County_Deaths_Change | State_Pos_Cases | State_Population | State_Rate_Per_100000 | State_Deaths | State_CDC_Deaths | State_Number_Hospitalizations | State_Number_Tested | State_Test_Encounters | State_Number_of_Counties_Pos | State_Number_of_Outbreaks | Data_Source | Date_Data_Last_Updated | Shape__Area | Shape__Length |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 08035 | Douglas County, Colorado | B19013_001 | 115314 | 2028 | 52 | Douglas County | Douglas | 8 | DOUGLAS | 35 | 22523 | 351528 | 6407.17 | 22517 | 6 | 224 | 225 | -1 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.2280495 | 2.246267 |
| 08039 | Elbert County, Colorado | B19013_001 | 96658 | 4279 | 59 | Elbert County | Elbert | 8 | ELBERT | 39 | 1337 | 26686 | 5010.12 | 1335 | 2 | 12 | 12 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.5005127 | 3.295997 |
| 08014 | Broomfield County, Colorado | B19013_001 | 89624 | 4013 | 21 | Broomfield County | Broomfield | 8 | BROOMFIELD | 14 | 3981 | 70762 | 5625.90 | 3982 | -1 | 74 | 74 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.0091580 | 1.174637 |
| 08037 | Eagle County, Colorado | B19013_001 | 84685 | 4478 | 18 | Eagle County | Eagle | 8 | EAGLE | 37 | 5415 | 55070 | 9832.94 | 5398 | 17 | 22 | 22 | 0 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.4596715 | 3.158051 |
| 08059 | Jefferson County, Colorado | B19013_001 | 78943 | 1142 | 12 | Jefferson County | Jefferson | 8 | JEFFERSON | 59 | 38778 | 583081 | 6650.53 | 38761 | 17 | 732 | 730 | 2 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.2101877 | 2.863442 |
| 08013 | Boulder County, Colorado | B19013_001 | 78642 | 1583 | 15 | Boulder County | Boulder | 8 | BOULDER | 13 | 19616 | 327164 | 5995.77 | 19606 | 10 | 227 | 228 | -1 | 444712 | 5763976 | 7715.37 | 6048 | 6126 | 24250 | 2654490 | 6597592 | 64 | 4100 | Colorado Department of Public Health and Environment | Data through March 15, 2021 | 0.2025347 | 2.383375 |
co_covid_medianincome_viz<-co_covid_medianincome %>%
ggplot()+
geom_point(aes(x=median_income,y=County_Rate_Per_100_000))+
ylab("County Covid+ Rate Per 100,000")+
xlab("County Median Income, 2018 (ACS Estimate)")
co_covid_medianincome_viz
income_cases_corr<-cor.test(co_covid_medianincome$median_income, co_covid_medianincome$County_Rate_Per_100_000)
income_cases_corr
##
## Pearson's product-moment correlation
##
## data: co_covid_medianincome$median_income and co_covid_medianincome$County_Rate_Per_100_000
## t = -1.5692, df = 62, p-value = 0.1217
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4210236 0.0529104
## sample estimates:
## cor
## -0.1954408
Student exercise: Make a scatterplot of the percentage of Colorado counties’ populations that are non-white and non-Hispanic (based on the 2015-2019 ACS) against their Covid positivity rates per 100,000
acs_2019_nonwhite_COcounties<-get_acs(geography="county",
state="CO",
variables="B03002_003",
summary_var = "B03002_001",
year=2019) %>%
mutate(white_pct=(estimate/summary_est)*100) %>%
mutate(nonwhite_pct=100-white_pct)
## Getting data from the 2015-2019 5-year ACS
co_covid_race<-inner_join(acs_2019_nonwhite_COcounties, co_covid,by="GEOID")
co_covid_race_viz<-co_covid_race %>%
ggplot()+
geom_point(aes(x=nonwhite_pct,y=County_Rate_Per_100_000))+
ylab("County Covid+ Rate Per 100,000")+
xlab("Non-White/Non-Hispanic Share of County Population, 2019 (ACS Estimate)")
nonwhite_cases_corr<-cor.test(co_covid_race$nonwhite_pct, co_covid_race$County_Rate_Per_100_000)
co_covid_race_viz
nonwhite_cases_corr
##
## Pearson's product-moment correlation
##
## data: co_covid_race$nonwhite_pct and co_covid_race$County_Rate_Per_100_000
## t = 1.8286, df = 62, p-value = 0.07227
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02075027 0.44715781
## sample estimates:
## cor
## 0.2262128